Dependencies

if (!require(tidyverse))
  install.packages("tidyverse")
Loading required package: tidyverse
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.3     ✓ purrr   0.3.4
✓ tibble  3.1.2     ✓ dplyr   1.0.6
✓ tidyr   1.1.3     ✓ stringr 1.4.0
✓ readr   1.4.0     ✓ forcats 0.5.1
── Conflicts ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(tidyverse)
if (!require(viridis))
  install.packages("viridis")
Loading required package: viridis
Loading required package: viridisLite
library(viridis)
if (!require(gh))
  install.packages("gh")
Loading required package: gh
library(gh)
if (!require(jsonlite))
  install.packages("jsonlite")
Loading required package: jsonlite

Attaching package: ‘jsonlite’

The following object is masked from ‘package:purrr’:

    flatten
library(jsonlite)
if (!require(DT))
  install.packages('DT')
Loading required package: DT
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
library(DT)

Configure GH API

Set ghToken GitHub personal access token with public access in order to authenticate with their API.

ghToken <- 'ghp_XXX'

ghAuth <- function(...) {
  gh(..., .token=ghToken)
}
ghAllPages <- function(...) {
  result <- list()
  i <- 0
  repeat {
    i <- i + 1
    newElements <- NA
    tryCatch({
      newElements <- ghAuth(..., per_page=100, page=i)
    }, error=function(e) {
      if (is(e, 'http_error_404') & i == 1) return(NA)
      else return(e)
    })
    if (anyNA(newElements)) return(NA)
    result <- append(result, newElements)
    if (length(newElements) < 100) return(result)
  }
}

Load Repos

repo_urls <- read.csv2('./dataset.csv') %>% transmute(repoUrl = Project.URLs)
repo_urls

Releases and Tags

To fetch tags and releases uncomment download and JSON export code.

#repoTagsReleases <- repo_urls %>%
#  mutate(owner=map_chr(repoUrl, ~ str_split(.x, '/')[[1]][4]),
#         repo=map_chr(repoUrl, ~ str_split(.x, '/')[[1]][5]),
#         tags=pmap(list('GET /repos/{owner}/{repo}/tags', owner=owner, repo=repo), ghAllPages),
#         tagsCount=map_int(tags, ~ if (anyNA(.x)) NA else length(.x)),
#         releases=pmap(list('GET /repos/{owner}/{repo}/releases', owner=owner, repo=repo), ghAllPages),
#         releasesCount=map_int(releases, ~ if (anyNA(.x)) NA else length(.x)))
#
#exportJSON <- toJSON(repoTagsReleases)
#write(exportJSON, 'repositories-tags-releases.json')

repoTagsReleases <- fromJSON('repositories-tags-releases.json')
repoTagsReleases %>% datatable()
Warning in instance$preRenderHook(instance) :
  It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.html
Warning in instance$preRenderHook(instance) :
  It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.html

GitHub Actions Workflows

ghActionWorkflows <- read.csv2('./gh-actions-workflows.txt', header=FALSE, col.names=c('ghActionWorkflow')) %>%
  mutate(project=map_chr(ghActionWorkflow, ~ str_split(.x, '/')[[1]][3]))
ghActionWorkflowRepos <- ghActionWorkflows %>% group_by(project) %>% summarise(ghActionWorkflows=list(ghActionWorkflow), ghActionsWorkflowsCount=n()) %>% ungroup()
ghActionWorkflowRepos %>% datatable()

Amazon States Language

aslWorkflows <- read.csv2('./asl.txt', header=FALSE, col.names=c('aslWorkflow')) %>%
  mutate(project=map_chr(aslWorkflow, ~ str_split(.x, '/')[[1]][3]))
aslWorkflowRepos <- aslWorkflows %>% group_by(project) %>% summarise(aslWorkflows=list(aslWorkflow), aslWorkflowsCount=n()) %>% ungroup()
aslWorkflowRepos %>% datatable()

Airflow

airflowWorkflows <- read.csv2('./airflow.txt', header=FALSE, col.names=c('airflowWorkflow')) %>%
  mutate(project=map_chr(airflowWorkflow, ~ str_split(.x, '/')[[1]][3]))
airflowWorkflowRepos <- airflowWorkflows %>% group_by(project) %>% summarise(airflowWorkflows=list(airflowWorkflow), airflowWorkflowsCount=n()) %>% ungroup()
airflowWorkflowRepos %>% datatable()

Summary

reposSummary <- repoTagsReleases %>% mutate(project=paste(owner, repo, sep='_')) %>%
  full_join(ghActionWorkflowRepos, by='project') %>%
  full_join(aslWorkflowRepos, by='project') %>%
  full_join(airflowWorkflowRepos, by='project')
reposSummary %>% datatable()
Warning in instance$preRenderHook(instance) :
  It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.html
Warning in instance$preRenderHook(instance) :
  It seems your data is too big for client-side DataTables. You may consider server-side processing: https://rstudio.github.io/DT/server.html
LS0tCnRpdGxlOiAiQWRkaXRpb25hbCBXb25kZXJsZXNzIFN0YXRzIgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgdG9jOiB5ZXMKLS0tCgo8c3R5bGUgdHlwZT0idGV4dC9jc3MiPgoubWFpbi1jb250YWluZXIgewogIG1heC13aWR0aDogaW5oZXJpdDsKfQouZGF0YXRhYmxlIHsKICBvdmVyZmxvdy14OiBhdXRvOwp9Cjwvc3R5bGU+CgoKYGBge3IsIGluY2x1ZGUgPSBGQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0UpCmBgYAoKIyBEZXBlbmRlbmNpZXMKCmBgYHtyfQppZiAoIXJlcXVpcmUodGlkeXZlcnNlKSkKICBpbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKaWYgKCFyZXF1aXJlKHZpcmlkaXMpKQogIGluc3RhbGwucGFja2FnZXMoInZpcmlkaXMiKQpsaWJyYXJ5KHZpcmlkaXMpCmlmICghcmVxdWlyZShnaCkpCiAgaW5zdGFsbC5wYWNrYWdlcygiZ2giKQpsaWJyYXJ5KGdoKQppZiAoIXJlcXVpcmUoanNvbmxpdGUpKQogIGluc3RhbGwucGFja2FnZXMoImpzb25saXRlIikKbGlicmFyeShqc29ubGl0ZSkKaWYgKCFyZXF1aXJlKERUKSkKICBpbnN0YWxsLnBhY2thZ2VzKCdEVCcpCmxpYnJhcnkoRFQpCmBgYAoKIyBDb25maWd1cmUgR0ggQVBJCgpTZXQgYGdoVG9rZW5gIEdpdEh1YiBwZXJzb25hbCBhY2Nlc3MgdG9rZW4gd2l0aCBwdWJsaWMgYWNjZXNzIGluIG9yZGVyIHRvIGF1dGhlbnRpY2F0ZSB3aXRoIHRoZWlyIEFQSS4KCmBgYHtyfQpnaFRva2VuIDwtICdnaHBfWFhYJwoKZ2hBdXRoIDwtIGZ1bmN0aW9uKC4uLikgewogIGdoKC4uLiwgLnRva2VuPWdoVG9rZW4pCn0KZ2hBbGxQYWdlcyA8LSBmdW5jdGlvbiguLi4pIHsKICByZXN1bHQgPC0gbGlzdCgpCiAgaSA8LSAwCiAgcmVwZWF0IHsKICAgIGkgPC0gaSArIDEKICAgIG5ld0VsZW1lbnRzIDwtIE5BCiAgICB0cnlDYXRjaCh7CiAgICAgIG5ld0VsZW1lbnRzIDwtIGdoQXV0aCguLi4sIHBlcl9wYWdlPTEwMCwgcGFnZT1pKQogICAgfSwgZXJyb3I9ZnVuY3Rpb24oZSkgewogICAgICBpZiAoaXMoZSwgJ2h0dHBfZXJyb3JfNDA0JykgJiBpID09IDEpIHJldHVybihOQSkKICAgICAgZWxzZSByZXR1cm4oZSkKICAgIH0pCiAgICBpZiAoYW55TkEobmV3RWxlbWVudHMpKSByZXR1cm4oTkEpCiAgICByZXN1bHQgPC0gYXBwZW5kKHJlc3VsdCwgbmV3RWxlbWVudHMpCiAgICBpZiAobGVuZ3RoKG5ld0VsZW1lbnRzKSA8IDEwMCkgcmV0dXJuKHJlc3VsdCkKICB9Cn0KYGBgCgojIExvYWQgUmVwb3MKCmBgYHtyfQpyZXBvX3VybHMgPC0gcmVhZC5jc3YyKCcuL2RhdGFzZXQuY3N2JykgJT4lIHRyYW5zbXV0ZShyZXBvVXJsID0gUHJvamVjdC5VUkxzKQpyZXBvX3VybHMKYGBgCgojIFJlbGVhc2VzIGFuZCBUYWdzCgpUbyBmZXRjaCB0YWdzIGFuZCByZWxlYXNlcyB1bmNvbW1lbnQgZG93bmxvYWQgYW5kIEpTT04gZXhwb3J0IGNvZGUuCgpgYGB7cn0KI3JlcG9UYWdzUmVsZWFzZXMgPC0gcmVwb191cmxzICU+JQojICBtdXRhdGUob3duZXI9bWFwX2NocihyZXBvVXJsLCB+IHN0cl9zcGxpdCgueCwgJy8nKVtbMV1dWzRdKSwKIyAgICAgICAgIHJlcG89bWFwX2NocihyZXBvVXJsLCB+IHN0cl9zcGxpdCgueCwgJy8nKVtbMV1dWzVdKSwKIyAgICAgICAgIHRhZ3M9cG1hcChsaXN0KCdHRVQgL3JlcG9zL3tvd25lcn0ve3JlcG99L3RhZ3MnLCBvd25lcj1vd25lciwgcmVwbz1yZXBvKSwgZ2hBbGxQYWdlcyksCiMgICAgICAgICB0YWdzQ291bnQ9bWFwX2ludCh0YWdzLCB+IGlmIChhbnlOQSgueCkpIE5BIGVsc2UgbGVuZ3RoKC54KSksCiMgICAgICAgICByZWxlYXNlcz1wbWFwKGxpc3QoJ0dFVCAvcmVwb3Mve293bmVyfS97cmVwb30vcmVsZWFzZXMnLCBvd25lcj1vd25lciwgcmVwbz1yZXBvKSwgZ2hBbGxQYWdlcyksCiMgICAgICAgICByZWxlYXNlc0NvdW50PW1hcF9pbnQocmVsZWFzZXMsIH4gaWYgKGFueU5BKC54KSkgTkEgZWxzZSBsZW5ndGgoLngpKSkKIwojZXhwb3J0SlNPTiA8LSB0b0pTT04ocmVwb1RhZ3NSZWxlYXNlcykKI3dyaXRlKGV4cG9ydEpTT04sICdyZXBvc2l0b3JpZXMtdGFncy1yZWxlYXNlcy5qc29uJykKCnJlcG9UYWdzUmVsZWFzZXMgPC0gZnJvbUpTT04oJ3JlcG9zaXRvcmllcy10YWdzLXJlbGVhc2VzLmpzb24nKQpyZXBvVGFnc1JlbGVhc2VzICU+JSBkYXRhdGFibGUoKQpgYGAKCiMgR2l0SHViIEFjdGlvbnMgV29ya2Zsb3dzCgpgYGB7cn0KZ2hBY3Rpb25Xb3JrZmxvd3MgPC0gcmVhZC5jc3YyKCcuL2doLWFjdGlvbnMtd29ya2Zsb3dzLnR4dCcsIGhlYWRlcj1GQUxTRSwgY29sLm5hbWVzPWMoJ2doQWN0aW9uV29ya2Zsb3cnKSkgJT4lCiAgbXV0YXRlKHByb2plY3Q9bWFwX2NocihnaEFjdGlvbldvcmtmbG93LCB+IHN0cl9zcGxpdCgueCwgJy8nKVtbMV1dWzNdKSkKZ2hBY3Rpb25Xb3JrZmxvd1JlcG9zIDwtIGdoQWN0aW9uV29ya2Zsb3dzICU+JSBncm91cF9ieShwcm9qZWN0KSAlPiUgc3VtbWFyaXNlKGdoQWN0aW9uV29ya2Zsb3dzPWxpc3QoZ2hBY3Rpb25Xb3JrZmxvdyksIGdoQWN0aW9uc1dvcmtmbG93c0NvdW50PW4oKSkgJT4lIHVuZ3JvdXAoKQpnaEFjdGlvbldvcmtmbG93UmVwb3MgJT4lIGRhdGF0YWJsZSgpCmBgYAoKIyBBbWF6b24gU3RhdGVzIExhbmd1YWdlCgpgYGB7cn0KYXNsV29ya2Zsb3dzIDwtIHJlYWQuY3N2MignLi9hc2wudHh0JywgaGVhZGVyPUZBTFNFLCBjb2wubmFtZXM9YygnYXNsV29ya2Zsb3cnKSkgJT4lCiAgbXV0YXRlKHByb2plY3Q9bWFwX2Nocihhc2xXb3JrZmxvdywgfiBzdHJfc3BsaXQoLngsICcvJylbWzFdXVszXSkpCmFzbFdvcmtmbG93UmVwb3MgPC0gYXNsV29ya2Zsb3dzICU+JSBncm91cF9ieShwcm9qZWN0KSAlPiUgc3VtbWFyaXNlKGFzbFdvcmtmbG93cz1saXN0KGFzbFdvcmtmbG93KSwgYXNsV29ya2Zsb3dzQ291bnQ9bigpKSAlPiUgdW5ncm91cCgpCmFzbFdvcmtmbG93UmVwb3MgJT4lIGRhdGF0YWJsZSgpCmBgYAoKIyBBaXJmbG93CgpgYGB7cn0KYWlyZmxvd1dvcmtmbG93cyA8LSByZWFkLmNzdjIoJy4vYWlyZmxvdy50eHQnLCBoZWFkZXI9RkFMU0UsIGNvbC5uYW1lcz1jKCdhaXJmbG93V29ya2Zsb3cnKSkgJT4lCiAgbXV0YXRlKHByb2plY3Q9bWFwX2NocihhaXJmbG93V29ya2Zsb3csIH4gc3RyX3NwbGl0KC54LCAnLycpW1sxXV1bM10pKQphaXJmbG93V29ya2Zsb3dSZXBvcyA8LSBhaXJmbG93V29ya2Zsb3dzICU+JSBncm91cF9ieShwcm9qZWN0KSAlPiUgc3VtbWFyaXNlKGFpcmZsb3dXb3JrZmxvd3M9bGlzdChhaXJmbG93V29ya2Zsb3cpLCBhaXJmbG93V29ya2Zsb3dzQ291bnQ9bigpKSAlPiUgdW5ncm91cCgpCmFpcmZsb3dXb3JrZmxvd1JlcG9zICU+JSBkYXRhdGFibGUoKQpgYGAKCiMgU3VtbWFyeQoKYGBge3J9CnJlcG9zU3VtbWFyeSA8LSByZXBvVGFnc1JlbGVhc2VzICU+JSBtdXRhdGUocHJvamVjdD1wYXN0ZShvd25lciwgcmVwbywgc2VwPSdfJykpICU+JQogIGZ1bGxfam9pbihnaEFjdGlvbldvcmtmbG93UmVwb3MsIGJ5PSdwcm9qZWN0JykgJT4lCiAgZnVsbF9qb2luKGFzbFdvcmtmbG93UmVwb3MsIGJ5PSdwcm9qZWN0JykgJT4lCiAgZnVsbF9qb2luKGFpcmZsb3dXb3JrZmxvd1JlcG9zLCBieT0ncHJvamVjdCcpCnJlcG9zU3VtbWFyeSAlPiUgZGF0YXRhYmxlKCkKYGBgCg==